% dfa-lexer.tex

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.60\textwidth}{figs/dfa-lexer}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    \red{\bf 目标: 正则表达式 RE $\implies$ 词法分析器}

    \vspace{0.30cm}
    \fig{width = 0.70\textwidth}{figs/re-dfa-lexer}
    \[
      \textsl{RE} \implies \epsilon\text{-}\textsl{NFA} \implies \textsl{NFA}
    \]

    \vspace{0.50cm}
    \purple{终点固然令人向往, 这一路上的风景更是美不胜收}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{definition}[NFA (Nondeteministic Finite Automaton)]
    非确定性有穷自动机 $\mathcal{A}$ 是一个五元组 
    \red{$\mathcal{A} = (\Sigma, S, s_{0}, \delta, F)$}:

    \vspace{0.30cm}
    \begin{enumerate}[(1)]
      \item 字母表 $\Sigma$ ($\epsilon \notin \Sigma$)
      \item \red{\bf 有穷}的状态集合 $S$
      \item \red{唯一}的初始状态 $s_{0} \in S$
      \item 状态转移\red{\bf 函数} $\delta$
        \[
          \delta: S \times (\Sigma \cup \set{\blue{\epsilon}}) \to \blue{2^{S}}
        \]
      \item 接受状态集合 $F \subseteq S$
    \end{enumerate}
  \end{definition}

  \fig{width = 0.70\textwidth}{figs/nfa-abb}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{columns}
    \column{0.33\textwidth}
      \fig{width = 0.55\textwidth}{figs/rabin}
      \begin{center}
        \teal{Michael O. Rabin (1931 $\sim$)}
      \end{center}
    \column{0.34\textwidth}
      \fig{width = 1.20\textwidth}{figs/fa-paper}
      \begin{center}
        发表于 1959 年; \\[6pt]
        1976年, 共享图灵奖 \\
      \end{center}
    \column{0.33\textwidth}
      \fig{width = 0.60\textwidth}{figs/scott}
      \begin{center}
        \teal{Dana Scott (1932 $\sim$)}
      \end{center}
  \end{columns}

  \vspace{0.30cm}
  \begin{center}
    \begin{quote}
    ``which introduced the idea of \red{\emph{\textbf{nondeterministic machines}}}, \\[6pt]
      which has proved to be an enormously valuable concept.''
    \end{quote}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    (非确定性)有穷自动机是一类极其简单的\red{\bf 计算}装置

    \vspace{0.60cm}
    它可以\red{\bf 识别} (接受/拒绝) $\Sigma$ 上的字符串

    \vspace{0.50cm}
    \begin{definition}[接受 (Accept)]
      (非确定性)有穷自动机 $\mathcal{A}$ 接受字符串$x$,
      当且仅当\red{\bf 存在}一条从开始状态 $s_{0}$ 到\red{\bf 某个}接受状态 $f \in F$、
      标号为 $x$ 的路径。
    \end{definition}

    \vspace{0.80cm}
    因此, \purple{$\mathcal{A}$ 定义了一种{\bf 语言} $L(\mathcal{A})$}: 
    它能接受的所有字符串构成的集合
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.70\textwidth}{figs/nfa-abb}

  \[
    aabb \in L(\mathcal{A}) \qquad ababab \notin L(\mathcal{A})
  \]

  \pause
  \[
    L(\mathcal{A}) = L((a|b)^{\ast}abb)
  \]
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    关于语言与自动机 $\mathcal{A}$ 的两个最基本的问题:

    \vspace{0.60cm}
    \[
      \text{给定字符串 $x$}, x \in L(\mathcal{A})?
    \]

    \vspace{0.30cm}
    $L(\mathcal{A})$ 究竟是什么?
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.45\textwidth}{figs/nfa-aa-bb}
  \[
    aaa \in \mathcal{A}?
  \]

  \[
    L(\mathcal{A}) = \pause L((aa^{\ast}|bb^{\ast}))
  \]
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{definition}[DFA (Deterministic Finite Automaton)]
    确定性有穷自动机 $\mathcal{A}$ 是一个五元组 
    \red{$\mathcal{A} = (\Sigma, S, s_{0}, \delta, F)$}:

    \vspace{0.30cm}
    \begin{enumerate}[(1)]
      \item 字母表 $\Sigma$ ($\epsilon \notin \Sigma$)
      \item \red{\bf 有穷}的状态集合 $S$
      \item \red{唯一}的初始状态 $s_{0} \in S$
      \item 状态转移\red{\bf 函数} $\delta$
        \[
          \delta: S \times \blue{\Sigma} \to \blue{S}
        \]
      \item 接受状态集合 $F \subseteq S$
    \end{enumerate}
  \end{definition}

  \fig{width = 0.50\textwidth}{figs/dfa-abb}

  \pause
  \begin{center}
    \blue{\bf 约定:} 所有没有对应出边的字符默认指向一个不存在的{\bf ``死状态''}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.50\textwidth}{figs/dfa-abb}

  \[
    L(\mathcal{A}) = \pause L((a|b)^{\ast}abb)
  \]

  \pause
  \fig{width = 0.50\textwidth}{figs/nfa-abb}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    NFA 与 DFA 的\red{\bf 优缺点}比较:

    \vspace{0.80cm}
    \blue{$x \in L(\mathcal{A}):$} 
    DFA 易于实现; NFA 不易实现

    \vspace{0.30cm}
    \blue{$L(\mathcal{A}):$} 
    NFA 简洁易于理解; DFA 状态多转移多

    \vspace{0.80cm}
    \red{\bf 取长补短:} 

    \vspace{0.30cm}
    用 NFA 描述, 用 NFA 实现; 

    \vspace{0.30cm}
    从 NFA 到 DFA 的转化自动完成
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.70\textwidth}{figs/re-dfa-lexer}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    从 RE 到 NFA: \red{\bf Thompson 构造法}
  \end{center}
  
  \pause
  \vspace{0.20cm}
  \fig{width = 0.60\textwidth}{figs/Thompson-Ritchie-Unix}
  \begin{center}
    Turing Award, 1983
  \end{center}

  % \begin{columns}
  %   \column{0.50\textwidth}
  %     \fig{width = 0.50\textwidth}{figs/Thompson}
  %     \begin{center}
  %       Ken Thompson (1943 $\sim$)
  %     \end{center}
  %   \column{0.50\textwidth}
  %     \fig{width = 0.50\textwidth}{figs/Ritchie}
  %     \begin{center}
  %       Dennis Ritchie (1941 $\sim$ 2011)
  %     \end{center}
  % \end{columns}
  
  % \begin{quote}
  %   \centering
  %   {\small ``For their development of generic operating systems theory \\
  %   and specifically for the implementation of the \red{UNIX} operating system.''
  %   \hfill --- Turing Award, 1983}
  % \end{quote}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    Thompson 构造法的基本思想: \red{\bf 按结构归纳}
  \end{center}

  \begin{definition}[正则表达式]
    给定字母表 $\Sigma$, $\Sigma$ 上的正则表达式\red{\bf 由且仅由}以下规则定义:
    \begin{enumerate}[(1)]
      \item $\epsilon$ 是正则表达式;
      \item $\forall a \in \Sigma$, $a$ 是正则表达式;
      \item 如果 $r$ 是正则表达式, 则 $(r)$ 是正则表达式;
      \item 如果 $r$ 与 $s$ 是正则表达式, 则 $r|s$, $rs$, $r^{\ast}$ 也是正则表达式。
    \end{enumerate}
  \end{definition}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    $\epsilon$ 是正则表达式。

    \pause
    \vspace{0.80cm}
    \fig{width = 0.40\textwidth}{figs/epsilon-nfa}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    $a \in \Sigma$ 是正则表达式。

    \pause
    \vspace{0.80cm}
    \fig{width = 0.40\textwidth}{figs/symbol-nfa}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    如果 $s$, $t$ 是正则表达式, 则 $s | t$ 是正则表达式。

    \pause
    \vspace{0.80cm}
    \fig{width = 0.50\textwidth}{figs/or-nfa}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    如果 $s$, $t$ 是正则表达式, 则 $st$ 是正则表达式。

    \pause
    \vspace{0.80cm}
    \fig{width = 0.60\textwidth}{figs/concat-nfa}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    如果 $s$, 则 $s^{\ast}$ 是正则表达式。

    \pause
    \vspace{0.80cm}
    \fig{width = 0.60\textwidth}{figs/kleene-star-nfa}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    Thompson 构造法得到的 NFA 的\red{\bf 性质}以及\red{复杂度分析}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \[
    r = (a | b)^{\ast} abb
  \]

  \pause
  \begin{columns}
    \column{0.30\textwidth}
      \fig{width = 0.80\textwidth}{figs/r1-nfa-ex}
    \column{0.30\textwidth}
      \fig{width = 0.80\textwidth}{figs/r2-nfa-ex}
    \column{0.40\textwidth}
      \pause
      \fig{width = 0.95\textwidth}{figs/r3-nfa-ex}
  \end{columns}

  \begin{columns}
    \column{0.50\textwidth}
      \pause
      \fig{width = 0.95\textwidth}{figs/r5-nfa-ex}
    \column{0.50\textwidth}
      \pause
      \fig{width = 0.95\textwidth}{figs/r6-nfa-ex}
  \end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    从 NFA 到 DFA 的转换: \red{\bf 子集构造法} (Subset/Powerset Construction)

    \vspace{0.50cm}
    \begin{columns}
      \column{0.50\textwidth}
        \fig{width = 0.45\textwidth}{figs/rabin}
        \begin{center}
          \teal{Michael O. Rabin (1931 $\sim$)}
        \end{center}
      \column{0.50\textwidth}
        \fig{width = 0.50\textwidth}{figs/scott}
        \begin{center}
          \teal{Dana Scott (1932 $\sim$)}
        \end{center}
    \end{columns}
  \end{center}

  \vspace{0.20cm}
  \begin{center}
    \blue{\bf 思想: 用 DFA 模拟 NFA}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    \red{\bf 用 DFA 模拟 NFA}

    \fig{width = 0.50\textwidth}{figs/nfa-abb}
    \[
      \blue{L(\mathcal{A}) = L((a|b)^{\ast}abb)}
    \]
    \fig{width = 0.50\textwidth}{figs/dfa-abb}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    \fig{width = 0.75\textwidth}{figs/nfa-abb-epsilon}
    \[
      \blue{L(\mathcal{A}) = L((a|b)^{\ast}abb)}
    \]
    \fig{width = 0.50\textwidth}{figs/dfa-abb-subset}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.50\textwidth}{figs/nfa-dfa-table}
  \fig{width = 0.60\textwidth}{figs/dfa-abb-subset}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \[
    \epsilon\text{-closure}(s)
  \]

  \pause
  \vspace{0.60cm}
  \[
    \epsilon\text{-closure}(T) = \bigcup_{s \in T} \epsilon\text{-closure}(s)
  \]

  \pause
  \vspace{0.60cm}
  \[
    \text{move}(T, a) = \bigcup_{s \in T} \delta(s, a)
  \]
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    子集构造法 ($\mathcal{N} \implies \mathcal{D}$) 实现时采用\red{\bf 标记搜索}过程

    \fig{width = 0.90\textwidth}{figs/subset-construction}

    \red{\bf 接受状态集 
      $F_{\mathcal{D}} = \set{s \in S_{\mathcal{D}} \mid \exists f \in F_{\mathcal{N}}.\; f \in s}$}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    子集构造法的\red{\bf 复杂度分析}: \\
    ($|S_{\mathcal{N}}| = n$)

    \[
      \Theta(2^n)
    \]

    \pause
    \vspace{0.30cm}
    最坏情况下, 复杂度为 $\Omega(2^n)$
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    ``长度为 $m \ge n$个字符的$a$, $b$串, 且倒数第$n$个字符是$a$''

    \pause
    \[
      L_{n} = (a | b)^{\ast} a (a | b)^{n-1}
    \]

    \pause
    \fig{width = 0.80\textwidth}{figs/nfa-Ln}

    \pause
    \vspace{0.30cm}
    \blue{\bf 作业:} $m = n = 3$
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.70\textwidth}{figs/re-dfa-lexer}

  \vspace{0.30cm}
  \begin{center}
    DFA 最小化
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \fig{width = 0.50\textwidth}{figs/dfa-abb}

  \[
    \blue{L(\mathcal{A}) = L((a|b)^{\ast}abb)}
  \]

  \fig{width = 0.50\textwidth}{figs/dfa-abb-subset}

  \begin{center}
    子集构造法得到的 DFA
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    \red{\bf DFA 最小化算法}基本思想: \blue{\bf 等价}的状态可以合并

    \vspace{0.30cm}
    \fig{width = 0.30\textwidth}{figs/Hopcroft}
    \teal{John Hopcroft (1939 $\sim$)}

    \vspace{0.30cm}
    \begin{quote}
      \centering
      ``for fundamental achievements in the design and analysis \\
      of \red{\emph{\textbf{algorithms and data structures}}}'' \\
      \hfill --- Turing Award, 1986
    \end{quote}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    如何定义\red{\bf 等价}状态?
  \end{center}

  \fig{width = 0.50\textwidth}{figs/dfa-abb-subset}

  \[
    s \sim t \iff \forall a \in \Sigma.\; 
      (s \rel{a} s') \land (t \rel{a} t') \land (\red{s' \sim t'}).
  \]

  \pause
  \vspace{0.20cm}
  \begin{center}
    \blue{\bf 从哪里开始?}
  \end{center}

  \pause
  \vspace{-0.50cm}
  \[
    \purple{\forall f, f' \in F.\; f \sim f'.}
  \]
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \[
    s \sim t \iff \forall a \in \Sigma.\; 
      (s \rel{a} s') \land (t \rel{a} t') \land (\red{s' \sim t'}).
  \]

  \begin{center}
    \red{\bf 划分操作}
  \end{center}

  \begin{columns}
    \column{0.50\textwidth}
      \fig{width = 1.00\textwidth}{figs/dfa-abb-subset}
    \column{0.50\textwidth}
      \[
        \Pi = \set{F, S \setminus F}
      \]
      \pause
  \end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  \begin{center}
    DFA 最小化\red{\bf 等价状态划分}方法
  \end{center}

  \[
    \Pi = \set{F, S \setminus F}
  \]
  
  \fig{width = 0.85\textwidth}{figs/partition}

  \begin{center}
    直到再也无法划分为止
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  特定于词法分析器的最小化方法
\end{frame}
%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%
\begin{frame}{}
  最小化的唯一性?
\end{frame}
%%%%%%%%%%%%%%%%%%%%